library(ggplot2)
library(plotly)
library(GGally)
Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
df = read.csv('cps.csv')
head(df)
cat("Number of instances : ",nrow(df))
Number of instances : 534
cat("\nNumber of attributes : ",ncol(df))
Number of attributes : 11
str(df)
'data.frame': 534 obs. of 11 variables:
$ wage : num 9 5.5 3.8 10.5 15 9 9.57 15 11 5 ...
$ educ : int 10 12 12 12 12 16 12 14 8 12 ...
$ race : chr "W" "W" "W" "W" ...
$ sex : chr "M" "M" "F" "F" ...
$ hispanic: chr "NH" "NH" "NH" "NH" ...
$ south : chr "NS" "NS" "NS" "NS" ...
$ married : chr "Married" "Married" "Single" "Married" ...
$ exper : int 27 20 4 29 40 27 5 22 42 14 ...
$ union : chr "Not" "Not" "Not" "Not" ...
$ age : int 43 38 22 47 58 49 23 42 56 32 ...
$ sector : chr "const" "sales" "sales" "clerical" ...
Find the distribution of wage
## Kernel Density Plot
density_plot = ggplot(df, aes(wage)) + geom_density(fill='indianred3') +
labs(x = 'wage', y = 'density', title = 'Kernal density of the brain weight')
density_plot
Find the distribution of wage with respect to race
## Bar-plot
bar_plot = plot_ly(data=df, x=~race, y=~wage, type='bar')
bar_plot
Is there a correlation between age and wage ??
x = df$wage ## numeric
y = df$age ## integer
cat(cor(x, y, method = c("pearson")))
0.1769669
Does the wage differ with marital status ??
## Bar-plot
bar_plot = plot_ly(data=df, x=~married, y=~wage, color=~sex, type='bar')
bar_plot %>% layout(
title = list(text = 'Age vs Wage'),
legend = list(title = 'Gender'),
xaxis = list(text = 'Age'),
yaxis = list(text = 'Wage')
)
Warning in RColorBrewer::brewer.pal(N, "Set2") :
minimal value for n is 3, returning requested palette with 3 different levels
Warning in RColorBrewer::brewer.pal(N, "Set2") :
minimal value for n is 3, returning requested palette with 3 different levels
Warning in RColorBrewer::brewer.pal(N, "Set2") :
minimal value for n is 3, returning requested palette with 3 different levels
Warning in RColorBrewer::brewer.pal(N, "Set2") :
minimal value for n is 3, returning requested palette with 3 different levels
df2 = iris
plot = ggplot(data = df2, aes(x=Sepal.Length, y=Sepal.Width, color=Sepal.Length, shape=Species)) + geom_point() +
labs(title = 'Sepal Length and Sepal Width',
x = 'Sepal Length',
y = 'Sepal Width')
plot
ggpairs(df2, columns=1:4, upper = 'blank', aes(color=Species)) + ggtitle('IRIS')
plot: [1,1] [===>--------------------------------------------------------------------] 6% est: 0s
plot: [1,2] [========>---------------------------------------------------------------] 12% est: 0s
plot: [1,3] [=============>----------------------------------------------------------] 19% est: 0s
plot: [1,4] [=================>------------------------------------------------------] 25% est: 0s
plot: [2,1] [=====================>--------------------------------------------------] 31% est: 0s
plot: [2,2] [==========================>---------------------------------------------] 38% est: 0s
plot: [2,3] [===============================>----------------------------------------] 44% est: 0s
plot: [2,4] [===================================>------------------------------------] 50% est: 0s
plot: [3,1] [=======================================>--------------------------------] 56% est: 0s
plot: [3,2] [============================================>---------------------------] 62% est: 0s
plot: [3,3] [=================================================>----------------------] 69% est: 0s
plot: [3,4] [=====================================================>------------------] 75% est: 0s
plot: [4,1] [=========================================================>--------------] 81% est: 0s
plot: [4,2] [==============================================================>---------] 88% est: 0s
plot: [4,3] [===================================================================>----] 94% est: 0s
plot: [4,4] [========================================================================]100% est: 0s
df4 = iris
df4 %>% mutate(ratio = Sepal.Length / Petal.Length)
head(df4)
df4 %>% select(-Species)
df4 %>% filter(Sepal.Width > 3.5)